#load data#
load("~/Desktop/thesis_data/wv6_2010-2014/WV6_Data_r_v_2015_04_18.rdata")
wv6 <- WV6_Data_v_2015_04_18
library(plyr)
library(Matrix)
library(Rcpp)
library(lme4)
library(QMSS)
library(doBy)
library(vcd)
library(lme4)
library(ggplot2)
library(usdm)
library(xlsx)

#preprocess data
wv6 <- wv6 [ which(wv6$V105>0),]
wv6 <- wv6 [ which(wv6$V102>0),]
wv6 <- wv6 [ which(wv6$V103>0),]
wv6 <- wv6 [ which(wv6$V104>0),]
wv6 <- wv6 [ which(wv6$V240>0),]
wv6 <- wv6 [ which(wv6$V57>0),]
wv6 <- wv6 [ which(wv6$V23>0),]
wv6 <- wv6 [ which(wv6$V10<5),]
wv6 <- wv6 [ which(wv6$V10>0),]
wv6 <- wv6 [ which(wv6$V96>0),]
wv6 <- wv6 [ which(wv6$V11>0),]
wv6 <- wv6 [ which(wv6$V239>0),]
wv6 <- wv6 [ which(wv6$V248>0),]
wv6 <- wv6 [ which(wv6$V242>0),]
wv6 <- wv6 [ which(wv6$V147>0),]
wv6 <- wv6 [ which(wv6$V229>0),]
wv6$V10 <- 5-wv6$V10
wv6$V102 <- 5-wv6$V102
wv6$V105 <- 5-wv6$V105
wv6$V103 <- 5-wv6$V103
wv6$V104 <- 5-wv6$V104
wv6$V11 <- 5-wv6$V11
wv6$par_trust <-with(wv6,(V103+V104)/2)
wv6 = rename(wv6, c("V102"="fam_trust"))
wv6 = rename(wv6, c("V105"="gen_trust"))
wv6$married=ifelse(wv6$V57==1, 1,0)
wv6$male <- ifelse(wv6$V240==1,1,0)
wv6$V2[wv6$V2==156] <- "China"
wv6$V2[wv6$V2==392] <- "Japan"
wv6$V2[wv6$V2==410] <- "South Korea"
wv6$V2[wv6$V2==840] <- "US"
wv6$V2[wv6$V2==276] <- "Germany"
wv6$V2[wv6$V2==400] <- "Jordan"
wv6$V2[wv6$V2==752] <- "Sweden"
wv6$V2[wv6$V2==112] <- "Belarus"
wv6$V2[wv6$V2==12] <- "Algeria"
wv6$V2[wv6$V2==152] <- "Chile"
wv6$V2[wv6$V2==158] <- "Taiwan"
wv6$V2[wv6$V2==170] <- "Colombia"
wv6$V2[wv6$V2==196] <- "Cyprus"
wv6$V2[wv6$V2==218] <- "Ecuador"
wv6$V2[wv6$V2==233] <- "Estonia"
wv6$V2[wv6$V2==268] <- "Georgia"
wv6$V2[wv6$V2==275] <- "Palestine"
wv6$V2[wv6$V2==288] <- "Ghana"
wv6$V2[wv6$V2==31] <- "Azerbaijan"
wv6$V2[wv6$V2==32] <- "Argentina"
wv6$V2[wv6$V2==344] <- "Hong Kong"
wv6$V2[wv6$V2==356] <- "India"
wv6$V2[wv6$V2==36] <- "Australia"
wv6$V2[wv6$V2==368] <- "Iraq"
wv6$V2[wv6$V2==398] <- "Kazakhstan"
wv6$V2[wv6$V2==414] <- "Kuwait"
wv6$V2[wv6$V2==417] <- "Kyrgyzstan"
wv6$V2[wv6$V2==422] <- "Lebanon"
wv6$V2[wv6$V2==434] <- "Libya"
wv6$V2[wv6$V2==458] <- "Malaysia"
wv6$V2[wv6$V2==48] <- "Bahrain"
wv6$V2[wv6$V2==484] <- "Mexico"
wv6$V2[wv6$V2==504] <- "Morocco"
wv6$V2[wv6$V2==51] <- "Armenia"
wv6$V2[wv6$V2==528] <- "Netherlands"
wv6$V2[wv6$V2==566] <- "Nigeria"
wv6$V2[wv6$V2==586] <- "Pakistan"
wv6$V2[wv6$V2==604] <- "Peru"
wv6$V2[wv6$V2==608] <- "Philippines"
wv6$V2[wv6$V2==616] <- "Poland"
wv6$V2[wv6$V2==634] <- "Qatar"
wv6$V2[wv6$V2==642] <- "Romania"
wv6$V2[wv6$V2==643] <- "Russia"
wv6$V2[wv6$V2==646] <- "Rwanda"
wv6$V2[wv6$V2==702] <- "Singapore"
wv6$V2[wv6$V2==705] <- "Slovenia"
wv6$V2[wv6$V2==710] <- "South Africa"
wv6$V2[wv6$V2==716] <- "Zimbabwe"
wv6$V2[wv6$V2==724] <- "Spain"
wv6$V2[wv6$V2==76] <- "Brazil"
wv6$V2[wv6$V2==764] <- "Thailand"
wv6$V2[wv6$V2==780] <- "Trinidad and Tobago"
wv6$V2[wv6$V2==788] <- "Tunisia"
wv6$V2[wv6$V2==792] <- "Turkey"
wv6$V2[wv6$V2==804] <- "Ukraine"
wv6$V2[wv6$V2==818] <- "Egypt"
wv6$V2[wv6$V2==858] <- "Uruguay"
wv6$V2[wv6$V2==860] <- "Uzbekistan"
wv6$V2[wv6$V2==887] <- "Yemen"
wv6 = rename(wv6, c("V2"="country"))
wv6 = rename(wv6, c("V10"="happy"))
wv6 = rename(wv6, c("V11"="health"))
wv6 = rename(wv6, c("V23"="satlife"))
wv6 = rename(wv6, c("V239"="income"))
wv6 = rename(wv6, c("V147"="religious"))
wv6 = rename(wv6, c("V248"="education"))
wv6 = rename(wv6, c("V229"="employment"))
table(wv6$China)
###add country-level variables
mydata <- read.xlsx("~/Desktop/country_mac_data.xlsx", sheetName = "sheet")
wv6 <- merge(wv6,mydata,by="country")
wv6<-ddply(wv6,"country",mutate,sum_trust=(fam_trust+par_trust+gen_trust)/3)
wv6$sum_trust <- (wv6$fam_trust+wv6$par_trust+wv6$gen_trust)/3
wv6$ln.GDP <- log(wv6$GDP.per.cap)
wv6$Gini <- wv6$GINI.Index/100
wv6<-ddply(wv6,"country",mutate,Gini)
wv6<-ddply(wv6,"country",mutate,ln.GDP)
with(wv6,by(sum_trust,income,summary))